

import requests
from bs4 import BeautifulSoup



url = 'https://www.shicimingju.com/book/sanguoyanyi.html'
proxy={
'HTTP': '123.7.17.237:8060'
}

headers = {
           'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36',
          }

page_text = requests.get(url,headers, proxies=proxy)
page_text.encoding = page_text.apparent_encoding
page_text = page_text.text

soup = BeautifulSoup(page_text,'lxml')

li_list = soup.select('.book-mulu > ul> li')

fp = open('./sanguo.txt', 'w',encoding='utf-8')
for li in li_list:
    title = li.a.text
    detail_url = 'https://www.shicimingju.com'+li.a['href']
    detail_page_text = requests.get(url=detail_url,headers=headers, proxies=proxy)
    detail_page_text.encoding = detail_page_text.apparent_encoding
    detail_page_text = detail_page_text.text
    detail_soup = BeautifulSoup(detail_page_text,'lxml')
    div_tag = detail_soup.find('div',class_='chapter_content')
    content = div_tag.text
    fp.write(title+':'+content+'\n')

    print(title, '爬取成功！')


